home *** CD-ROM | disk | FTP | other *** search
- //
- // BEGIN FLOCK GPL
- //
- // Copyright Flock Inc. 2005-2007
- // http://flock.com
- //
- // This file may be used under the terms of of the
- // GNU General Public License Version 2 or later (the "GPL"),
- // http://www.gnu.org/licenses/gpl.html
- //
- // Software distributed under the License is distributed on an "AS IS" basis,
- // WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
- // for the specific language governing rights and limitations under the
- // License.
- //
- // END FLOCK GPL
- //
-
- const CLASS_ID = Components.ID("{C2BFF231-7A51-4764-8409-A1B22B2F5147}");
- const CLASS_NAME = "Flock Indexer";
- const CONTRACT_ID = "@flock.com/indexer;1";
-
- const FLOCK_NS = "http://flock.com/rdf#";
- const NSCP_NS = "http://home.netscape.com/NC-rdf#";
-
- const PROP_URL = NSCP_NS + "URL";
- const PROP_NAME = NSCP_NS + "Name";
- const PROP_CHILD = NSCP_NS + "child";
-
- const Cc = Components.classes;
- const Ci = Components.interfaces;
- const Cr = Components.results;
-
- /* from nspr's prio.h */
- const PR_RDONLY = 0x01;
- const PR_WRONLY = 0x02;
- const PR_RDWR = 0x04;
- const PR_CREATE_FILE = 0x08;
- const PR_APPEND = 0x10;
- const PR_TRUNCATE = 0x20;
- const PR_SYNC = 0x40;
- const PR_EXCL = 0x80;
-
- const OP_DELETE = 0;
- const OP_ADD_FLOCK = 1;
- const OP_ADD_HISTORY = 2;
-
- function flockIndexer() {
- var obs = Cc["@mozilla.org/observer-service;1"].getService(Ci.nsIObserverService);
- obs.addObserver(this, "xpcom-shutdown", false);
- }
-
- flockIndexer.prototype = {
-
- init: function INDEXER_init() {
- this._enabled = false;
- this._enabledWebHistory = false;
-
- this._logger = Cc["@flock.com/logger;1"].createInstance(Ci.flockILogger);
- this._logger.init("indexer");
-
- this._logger.info("starting up...");
-
- this._RDFS = Cc["@mozilla.org/rdf/rdf-service;1"]
- .getService(Ci.nsIRDFService);
- this._resURL = this._RDFS.GetResource(PROP_URL);
- this._resName = this._RDFS.GetResource(PROP_NAME);
- this._resHistoryRoot = this._RDFS.GetResource("NC:HistoryRoot");
- this._resHistoryChild = this._RDFS.GetResource(PROP_CHILD);
- this._resIsIndexable = this._RDFS.GetResource(FLOCK_NS + "isIndexable");
- this._resTitle = this._RDFS.GetResource(NSCP_NS + "Name");
- this._resDescription = this._RDFS.GetResource(NSCP_NS + "Description");
- this._resTags = this._RDFS.GetResource(FLOCK_NS + "tags");
-
- this._searchService = Cc["@flock.com/lucene/flockLucene;1"]
- .getService(Ci.flockILucene);
- this._favService = Cc["@mozilla.org/rdf/datasource;1?name=flock-favorites"]
- .getService(Ci.flockIRDFObservable);
- this._historyRdf = Cc["@mozilla.org/rdf/datasource;1?name=history"]
- .getService(Ci.nsIRDFDataSource);
- this._ios = Cc["@mozilla.org/network/io-service;1"]
- .getService(Ci.nsIIOService);
-
- this._coop = Cc["@flock.com/singleton;1"].getService(Ci.flockISingleton)
- .getSingleton("chrome://browser/content/flock/common/load-faves-coop.js")
- .wrappedJSObject;
-
- var dirService = Cc["@mozilla.org/file/directory_service;1"]
- .getService(Ci.nsIProperties);
- var profileDir = dirService.get("ProfD", Ci.nsIFile);
-
- // Clean up old cardinal index dir
- var oldHistorySearchDir = profileDir.clone();
- oldHistorySearchDir.append("historysearch");
- try {
- oldHistorySearchDir.remove(true);
- }
- catch (ex) {
- }
-
- // initialize Lucene
- var luceneDir = profileDir.clone();
- luceneDir.append("lucene");
- this._searchService.init(luceneDir);
-
- // initialize the queue
- this._initQueue();
-
- // init queue processing timer
- this._timer = Cc["@mozilla.org/timer;1"].createInstance(Ci.nsITimer);
-
- // init page text store
- this._pageText = {};
-
- // create tokenizer
- this._tokenizer = Cc["@flock.com/tokenizer;1"].createInstance(Ci.flockITokenizer);
-
- // init with current prefs
- this.observe(null, "nsPref:changed", null);
-
- // watch for pref changes
- var prefService = Cc["@mozilla.org/preferences-service;1"]
- .getService(Ci.nsIPrefBranch2);
- prefService.addObserver("flock.service.indexer.enabled", this, false);
- prefService.addObserver("flock.service.indexer.indexWebHistory", this, false);
- },
-
- rebuildIndex: function INDEXER_rebuildIndex() {
- this._logger.info("rebuilding index...");
-
- // reindex history
- var records = this._historyRdf.GetTargets(this._resHistoryRoot,
- this._resHistoryChild,
- true);
- var record, url;
- while (records.hasMoreElements()) {
- record = records.getNext().QueryInterface(Ci.nsIRDFResource);
- try {
- url = this._ios.newURI(record.ValueUTF8, null, null);
- } catch (e) {
- url = null;
- }
- if (url && url.scheme == "http") {
- this._addOp(OP_ADD_HISTORY, "history:" + url.spec);
- }
- }
- },
-
- _getQueueFile: function INDEXER__getQueueFile() {
- var file = Cc["@mozilla.org/file/directory_service;1"]
- .getService(Ci.nsIProperties).get("ProfD", Ci.nsILocalFile);
- file.append("indexerQueue.js");
- return file;
- },
-
- _initQueue: function INDEXER__initQueue() {
- var queue;
-
- try {
- var file = this._getQueueFile();
-
- var stream = Cc["@mozilla.org/network/file-input-stream;1"]
- .createInstance(Ci.nsIFileInputStream);
- stream.init(file, PR_RDONLY, 0, 0);
-
- var cvstream = Cc["@mozilla.org/intl/converter-input-stream;1"]
- .createInstance(Ci.nsIConverterInputStream);
- cvstream.init(stream, "UTF-8", 1024,
- Ci.nsIConverterInputStream.DEFAULT_REPLACEMENT_CHARACTER);
-
- var content = "";
- var data = {};
- while (cvstream.readString(4096, data)) {
- content += data.value;
- }
- cvstream.close();
-
- queue = content.replace(/\r\n?/g, "\n");
- file.remove(false);
- } catch (e) {
- queue = null;
- }
-
- if (queue) {
- try {
- var s = new Components.utils.Sandbox("about:blank");
- this._queue = Components.utils.evalInSandbox(queue, s);
- this._logger.info("restored queue");
- return;
- } catch (e) {
- this._logger.error("unable to restore queue");
- }
- }
-
- // default to empty queue
- this._queue = [];
- },
-
- _saveQueue: function INDEXER__saveQueue() {
- // do nothing if queue is empty
- if (this._queue.length == 0)
- return;
-
- try {
- this._logger.info("saving queue...");
- var file = this._getQueueFile();
-
- var ostream = Cc["@mozilla.org/network/safe-file-output-stream;1"]
- .createInstance(Ci.nsIFileOutputStream);
- ostream.init(file, PR_WRONLY | PR_CREATE_FILE | PR_TRUNCATE, 0600, 0);
-
- var converter = Cc["@mozilla.org/intl/scriptableunicodeconverter"]
- .createInstance(Ci.nsIScriptableUnicodeConverter);
- converter.charset = "UTF-8";
-
- var data = this._queue.toSource();
- var convdata = converter.ConvertFromUnicode(data) + converter.Finish();
-
- ostream.write(convdata, convdata.length);
-
- if (ostream instanceof Ci.nsISafeOutputStream) {
- ostream.finish();
- } else {
- ostream.close();
- }
- } catch (e) {
- this._logger.error("unable to save queue");
- }
- },
-
- _enable: function INDEXER__enable() {
- if (!this._enabled) {
- // start watching the favorites and history graphs
- this._favService.addArcObserver(Ci.flockIRDFObserver.TYPE_ALL, null,
- this._resIsIndexable, null, this);
- this._favService.addArcObserver(Ci.flockIRDFObserver.TYPE_CHANGE, null,
- this._resTitle, null, this);
- this._favService.addArcObserver(Ci.flockIRDFObserver.TYPE_CHANGE, null,
- this._resDescription, null, this);
- this._favService.addArcObserver(Ci.flockIRDFObserver.TYPE_CHANGE, null,
- this._resTags, null, this);
- this._historyRdf.AddObserver(this);
- this._enabled = true;
- this._enabledWebHistory = true;
- this._logger.info("indexing enabled");
- }
- },
-
- _disable: function INDEXER__disable() {
- if (this._enabled) {
- // stop watching the favorites and history graphs
- this._favService.removeArcObserver(Ci.flockIRDFObserver.TYPE_ALL, null,
- this._resIsIndexable, null, this);
- this._favService.removeArcObserver(Ci.flockIRDFObserver.TYPE_CHANGE, null,
- this._resTitle, null, this);
- this._favService.removeArcObserver(Ci.flockIRDFObserver.TYPE_CHANGE, null,
- this._resDescription, null, this);
- this._favService.removeArcObserver(Ci.flockIRDFObserver.TYPE_CHANGE, null,
- this._resTags, null, this);
- this._historyRdf.RemoveObserver(this);
- this._enabled = false;
- this._enabledWebHistory = false;
- this._logger.info("indexing disabled");
- }
- },
-
- _enableWebHistory: function INDEXER__enableWebHistory() {
- if (this._enabled) {
- if (!this._enabledWebHistory) {
- this._historyRdf.AddObserver(this);
- this._enabledWebHistory = true;
- this._logger.info("web history indexing enabled");
- }
- }
- },
-
- _disableWebHistory: function INDEXER__disableWebHistory() {
- if (this._enabled) {
- if (this._enabledWebHistory) {
- this._historyRdf.RemoveObserver(this);
- this._enabledWebHistory = false;
- this._logger.info("web history indexing disabled");
- }
- }
- },
-
- _shutdown: function INDEXER__shutdown() {
- this._disable();
- this._timer.cancel();
- this._saveQueue();
- this._logger.info("shut down");
- },
-
- _processQueue: function INDEXER__processQueue(sync) {
- var batchSize = 1;
- var c = 0;
- while (this._queue.length > 0 && c < batchSize) {
- c++;
- var op = this._queue[0];
- var instruction = op[0];
- var uri = op[1];
- switch (instruction) {
- case OP_DELETE:
- this._logger.info("removing " + uri);
- if (sync) {
- this._searchService.deleteDocumentSync(uri);
- } else {
- this._searchService.deleteDocument(uri, this);
- }
- break;
-
- case OP_ADD_FLOCK:
- var obj = this._coop.get(uri);
- if (!obj) {
- var msg = "trying to add nonexistent object: " + uri;
- this._logger.error(msg);
- throw Components.Exception(msg, Cr.NS_ERROR_UNEXPECTED);
- }
-
- var url = obj.URL;
- if (!url) {
- this._logger.warn("unable to get URL for " + uri);
- url = "";
- }
-
- var type = obj.flockType;
- var title = obj.name;
- if (obj.tags) {
- var tags = obj.tags;
- } else {
- var tags = "";
- }
- if (obj.description) {
- var description = obj.description;
- } else {
- var description = "";
- }
- var text = "";
- this._logger.info("indexing " + uri + " type: " + type + " title: " + title + " url: " + url);
-
- if (sync) {
- this._searchService.addDocumentSync(uri, type, url, title, tags, description, text);
- } else {
- this._searchService.addDocument(uri, type, url, title, tags, description, text, this);
- }
- break;
-
- case OP_ADD_HISTORY:
- this._logger.info("indexing " + uri + " type: history");
- if (!sync) {
- this._addHistoryDocument(uri);
- }
- break;
- }
- }
- },
-
- _addHistoryDocument: function INDEXER__addHistoryDocument(aURI) {
- var uri = aURI;
- var url = uri.substr("history:".length);
- var title = "";
- var tags = "";
- var description = "";
- var data = "";
-
- var resource = this._historyRdf.GetSource(this._resURL,
- this._RDFS.GetResource(url),
- true);
- if (resource) {
- var title_node = this._historyRdf.GetTarget(resource, this._resName, true);
- if (title_node && title_node.QueryInterface(Ci.nsIRDFLiteral)) {
- title = title_node.Value;
- this._logger.debug("got title");
- }
- }
- if (url in this._pageText) {
- data = this._pageText[url];
- delete this._pageText[url];
- }
- this._searchService.addDocument(uri, "history", url, title, tags, description, data, this);
- this._logger.debug("document indexed text: " + data);
- },
-
- _addOp: function INDEXER__addOp(aOp, aURI) {
- this._queue.push([aOp, aURI]);
- this._logger.info("queued operation op: " + aOp + " uri: " + aURI);
- this._timer.initWithCallback(this, 250, Ci.nsITimer.TYPE_ONE_SHOT);
- },
-
- _retireOp: function INDEXER__retireOp() {
- this._queue.shift();
- if (this._queue.length > 0) {
- this._timer.initWithCallback(this, 0, Ci.nsITimer.TYPE_ONE_SHOT);
- }
- },
-
- _handlePageLoad: function INDEXER_handlePageLoad(document) {
- var url = document.documentURI;
- this._logger.debug("_handlePageLoad: " + url);
- this._pageText[url] = this._tokenizer.tokenizeDOMNode(document, document.body);
- this._addOp(OP_ADD_HISTORY, "history:" + url);
- },
-
- // nsIDOMEventListener
- handleEvent: function INDEXER_handleEvent(aEvent) {
- switch(aEvent.type) {
- case "load":
- var url;
- try {
- url = this._ios.newURI(aEvent.originalTarget.documentURI, null, null);
- } catch (e) { }
- if (url && url.scheme == "http") {
- this._handlePageLoad(aEvent.originalTarget);
- }
- break;
- }
- },
-
- // flockIMigratable
- get shortname() { return 'Indexer'; },
-
- // flockIMigratable
- needsMigration: function INDEXER_needsMigration(oldVersion) {
- return oldVersion.substr(0, 3) == "0.7";
- },
-
- // flockIMigratable
- startMigration: function INDEXER_startMigration(oldVersion, listener) {
- return null;
- },
-
- // flockIMigratable
- finishMigration: function INDEXER_finishMigration(ctxtWrapper) {
- },
-
- // flockIMigratable
- doMigrationWork: function INDEXER_doMigrationWork(ctxtWrapper) {
- this.rebuildIndex();
- return false;
- },
-
- // flockILuceneListener
- onAddDocumentComplete: function INDEXER_onAddDocumentComplete(aURI) {
- this._logger.debug("onAddDocumentComplete: " + aURI);
- this._retireOp();
- },
-
- // flockILuceneListener
- onDeleteDocumentComplete: function INDEXER_onDeleteDocumentComplete(aURI) {
- this._logger.debug("onDeleteDocumentComplete: " + aURI);
- this._retireOp();
- },
-
- // nsITimerCallback
- notify: function INDEXER_notify(timer) {
- this._processQueue(false);
- },
-
- // flockIRDFObserver
- rdfChanged: function INDEXER__rdfChanged(ds, type, rsrc, pred, obj, oldObj) {
- if (pred == this._resIsIndexable) {
- switch (type) {
- case Ci.flockIRDFObserver.TYPE_ASSERT:
- var indexable = this._coop.get_from_resource(rsrc);
- if (indexable.isIndexable) {
- this._addOp(OP_ADD_FLOCK, indexable.id());
- }
- break;
-
- case Ci.flockIRDFObserver.TYPE_CHANGE:
- var indexable = this._coop.get_from_resource(rsrc);
- var op = indexable.isIndexable ? OP_ADD_FLOCK : OP_DELETE;
- this._addOp(op, indexable.id());
- break;
-
- case Ci.flockIRDFObserver.TYPE_UNASSERT:
- rsrc.QueryInterface(Ci.nsIRDFResource);
- this._addOp(OP_DELETE, rsrc.ValueUTF8);
- break;
- }
- } else {
- var indexable = this._coop.get_from_resource(rsrc);
- if (indexable && indexable.isIndexable) {
- this._addOp(OP_ADD_FLOCK, indexable.id());
- }
- }
- },
-
- // nsIRDFObserver
- onAssert: function INDEXER_onAssert(ds, source, predicate, target) {
- if (predicate.ValueUTF8 == PROP_NAME &&
- ds.HasAssertion(this._resHistoryRoot, this._resHistoryChild,
- source, true)) {
- var url = null;
- try {
- url = this._ios.newURI(source.ValueUTF8, null, null);
- }
- catch (e) { }
- if (url && url.scheme == "http") {
- this._addOp(OP_ADD_HISTORY, "history:" + url.spec);
- }
- }
- },
-
- // nsIRDFObserver
- onUnassert: function INDEXER_onUnassert(ds, source, predicate, target) {
- if (source.ValueUTF8 == "NC:HistoryRoot" &&
- predicate.ValueUTF8 == PROP_CHILD) {
- target.QueryInterface(Ci.nsIRDFResource);
- var url = null;
- try {
- url = this._ios.newURI(target.ValueUTF8, null, null);
- } catch (e) {}
- if (url && url.scheme == "http") {
- this._addOp(OP_DELETE, "history:" + url.spec);
- }
- }
- },
-
- // nsIRDFObserver
- onChange: function INDEXER_onChange(ds, source, predicate, oldTarget, newTarget) {
- if (predicate.ValueUTF8 == PROP_NAME &&
- ds.HasAssertion(this._resHistoryRoot, this._resHistoryChild,
- source, true)) {
- var url = null;
- try {
- url = this._ios.newURI(source.ValueUTF8, null, null);
- } catch (e) {}
- if (url && url.scheme == "http") {
- this._addOp(OP_ADD_HISTORY, "history:" + url.spec);
- }
- }
- },
-
- // nsIRDFObserver
- onMove: function INDEXER_onMove(ds, oldSource, newSource, predicate, target) {
- },
-
- // nsIRDFObserver
- onBeginUpdateBatch: function INDEXER_onBeginUpdateBatch(ds) {
- },
-
- // nsIRDFObserver
- onEndUpdateBatch: function INDEXER_onEndUpdateBatch(ds) {
- },
-
- // nsIObserver
- observe: function INDEXER_observe(subject, topic, state) {
- switch (topic) {
- case "xpcom-shutdown":
- var obs = Cc["@mozilla.org/observer-service;1"]
- .getService(Ci.nsIObserverService);
- obs.removeObserver(this, "xpcom-shutdown");
- this._shutdown();
- return;
- case "nsPref:changed":
- var prefService = Cc["@mozilla.org/preferences-service;1"]
- .getService(Ci.nsIPrefBranch);
- if (prefService.getPrefType("flock.service.indexer.enabled")) {
- if (prefService.getBoolPref("flock.service.indexer.enabled")) {
- this._enable();
- } else {
- this._disable();
- }
- } else {
- this._enable();
- }
- if (prefService.getPrefType("flock.service.indexer.indexWebHistory")) {
- if (prefService.getBoolPref("flock.service.indexer.indexWebHistory")) {
- this._enableWebHistory();
- } else {
- this._disableWebHistory();
- }
- } else {
- this._enableWebHistory();
- }
- break;
- }
- },
-
- // nsIClassInfo
- getInterfaces: function INDEXER_getInterfaces(aCount) {
- var interfaces = [Ci.flockIIndexer, Ci.nsIClassInfo, Ci.nsIObserver,
- Ci.flockIRDFObserver, Ci.nsIRDFObserver,
- Ci.nsITimerCallback, Ci.flockILuceneListener,
- Ci.flockIMigratable, Ci.nsIDOMEventListener];
- aCount.value = interfaces.length;
- return interfaces;
- },
-
- // nsIClassInfo
- getHelperForLanguage: function INDEXER_getHelperForLanguage(aLanguage) {
- return null;
- },
-
- // nsIClassInfo
- contractID: CONTRACT_ID,
-
- // nsIClassInfo
- classDescription: CLASS_NAME,
-
- // nsIClassInfo
- classID: CLASS_ID,
-
- // nsIClassInfo
- implementationLanguage: Ci.nsIProgrammingLanguage.JAVASCRIPT,
-
- // nsIClassInfo
- flags: Ci.nsIClassInfo.SINGLETON,
-
- // nsISupports
- QueryInterface: function INDEXER_QueryInterface(aIID) {
- if (!aIID.equals(Ci.nsISupports) &&
- !aIID.equals(Ci.flockIIndexer) &&
- !aIID.equals(Ci.nsIClassInfo) &&
- !aIID.equals(Ci.nsIObserver) &&
- !aIID.equals(Ci.flockIRDFObserver) &&
- !aIID.equals(Ci.nsIRDFObserver) &&
- !aIID.equals(Ci.nsITimerCallback) &&
- !aIID.equals(Ci.flockILuceneListener) &&
- !aIID.equals(Ci.flockIMigratable) &&
- !aIID.equals(Ci.nsIDOMEventListener))
- throw Cr.NS_ERROR_NO_INTERFACE;
- return this;
- }
-
- };
-
- /******************************************************************************
- * XPCOM Functions for construction and registration
- ******************************************************************************/
- var Module = {
- _firstTime: true,
- registerSelf: function(aCompMgr, aFileSpec, aLocation, aType) {
- if (this._firstTime) {
- this._firstTime = false;
- throw Cr.NS_ERROR_FACTORY_REGISTER_AGAIN;
- }
- aCompMgr = aCompMgr.QueryInterface(Ci.nsIComponentRegistrar);
- aCompMgr.registerFactoryLocation(CLASS_ID, CLASS_NAME, CONTRACT_ID, aFileSpec, aLocation, aType);
-
- var catman = Cc["@mozilla.org/categorymanager;1"].getService(Ci.nsICategoryManager);
- catman.addCategoryEntry("flockMigratable", CLASS_NAME, CONTRACT_ID, true, true);
- },
-
- unregisterSelf: function(aCompMgr, aLocation, aType) {
- aCompMgr = aCompMgr.QueryInterface(Ci.nsIComponentRegistrar);
- aCompMgr.unregisterFactoryLocation(CLASS_ID, aLocation);
- },
-
- getClassObject: function(aCompMgr, aCID, aIID) {
- if (!aIID.equals(Ci.nsIFactory))
- throw Cr.NS_ERROR_NOT_IMPLEMENTED;
- if (aCID.equals(CLASS_ID))
- return Factory;
- throw Cr.NS_ERROR_NO_INTERFACE;
- },
-
- canUnload: function(aCompMgr) { return true; }
- };
-
- var Factory = {
- createInstance: function(aOuter, aIID)
- {
- if (aOuter != null)
- throw Cr.NS_ERROR_NO_AGGREGATION;
- return (new flockIndexer()).QueryInterface(aIID);
- }
- };
-
- function NSGetModule(aCompMgr, aFileSpec) { return Module; }
-
-